knitr::opts_chunk$set(echo = TRUE)
#Tanay Dangaich
#import libraries
library(readr)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ dplyr 1.1.0
## ✔ tibble 3.1.8 ✔ stringr 1.5.0
## ✔ tidyr 1.3.0 ✔ forcats 1.0.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(ggplot2)
library(ggcorrplot)
#read csv file and store in sparr
gender <- read_csv("~/Documents/Courses/Multivariate Analysis/gender.csv")
## New names:
## Rows: 131 Columns: 10
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): Gender, Occupation, Education Level, Marital Status, Favorite Color dbl
## (4): Age, Height (cm), Weight (kg), Income (USD) lgl (1): ...10
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...10`
gender
## # A tibble: 131 × 10
## Gender Age Height (…¹ Weigh…² Occup…³ Educa…⁴ Marit…⁵ Incom…⁶ Favor…⁷ ...10
## <chr> <dbl> <dbl> <dbl> <chr> <chr> <chr> <dbl> <chr> <lgl>
## 1 male 32 175 70 Softwa… Master… Married 75000 Blue NA
## 2 male 25 182 85 Sales … Bachel… Single 45000 Green NA
## 3 female 41 160 62 Doctor Doctor… Married 120000 Purple NA
## 4 male 38 178 79 Lawyer Bachel… Single 90000 Red NA
## 5 female 29 165 58 Graphi… Associ… Single 35000 Yellow NA
## 6 male 45 190 92 Busine… Master… Divorc… 110000 Black NA
## 7 female 27 163 55 Market… Bachel… Single 50000 Pink NA
## 8 male 52 179 83 CEO Doctor… Married 500000 Blue NA
## 9 female 31 168 61 Projec… Bachel… Married 80000 Green NA
## 10 male 36 177 76 Engine… Master… Married 95000 Orange NA
## # … with 121 more rows, and abbreviated variable names ¹`Height (cm)`,
## # ²`Weight (kg)`, ³Occupation, ⁴`Education Level`, ⁵`Marital Status`,
## # ⁶`Income (USD)`, ⁷`Favorite Color`
#male matrix, colMeans, covariance and correlation
df <- as.data.frame(gender)
View(df)
df_1 = df
df_1 = df_1[, -9:-10]
df_1 = df_1[, -5:-7]
View(df_1)
male = filter(df_1, Gender == "male")
male.nums <- male[,-1]
View(male.nums)
#colMeans
colMeans(male.nums)
## Age Height (cm) Weight (kg) Income (USD)
## 38.30882 180.14706 82.75000 121250.00000
#covariance
cov(male.nums)
## Age Height (cm) Weight (kg) Income (USD)
## Age 2.785843e+01 9.730026e+00 18.30224 300205.2
## Height (cm) 9.730026e+00 1.379895e+01 16.90299 110858.2
## Weight (kg) 1.830224e+01 1.690299e+01 27.53358 192854.5
## Income (USD) 3.002052e+05 1.108582e+05 192854.47761 8454757462.7
#correlation
cor_male <- cor(male.nums)
cor_male
## Age Height (cm) Weight (kg) Income (USD)
## Age 1.0000000 0.4962640 0.6608373 0.6185707
## Height (cm) 0.4962640 1.0000000 0.8671797 0.3245595
## Weight (kg) 0.6608373 0.8671797 1.0000000 0.3997126
## Income (USD) 0.6185707 0.3245595 0.3997126 1.0000000
ggcorrplot(cor_male)
Inference: The height and weight are highly correlated for males. Income and age are correlated, the higher the age, the more they are earning. Age and weight also have a positive correlation.
#female matrix, colMeans, covariance and correlation
female = filter(df_1, Gender == "female")
female.nums <- df_1[,-1]
View(female.nums)
#colMeans
colMeans(female.nums)
## Age Height (cm) Weight (kg) Income (USD)
## 34.56489 173.19847 71.45802 93206.10687
#covariance
cov_female <- cov(female.nums)
cov_female
## Age Height (cm) Weight (kg) Income (USD)
## Age 35.81691 34.97164 59.40082 293482.7
## Height (cm) 34.97164 64.72954 99.23147 271781.9
## Weight (kg) 59.40082 99.23147 159.97322 455174.1
## Income (USD) 293482.67763 271781.85555 455174.10452 5482718731.7
#correlation
cor_female <- cor(female.nums)
cor_female
## Age Height (cm) Weight (kg) Income (USD)
## Age 1.0000000 0.7263077 0.7847381 0.6622781
## Height (cm) 0.7263077 1.0000000 0.9751570 0.4562168
## Weight (kg) 0.7847381 0.9751570 1.0000000 0.4860220
## Income (USD) 0.6622781 0.4562168 0.4860220 1.0000000
ggcorrplot(cor_female)
Inference: The height and weight are highly correlated for females. Income and age are correlated, the higher the age, the more they are earning. Age and weight also have a positive correlation.
knitr::opts_chunk$set(echo = TRUE)
library(readr)
library(ggplot2)
gender <- read_csv("~/Documents/Courses/Multivariate Analysis/gender.csv")
## New names:
## Rows: 131 Columns: 10
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): Gender, Occupation, Education Level, Marital Status, Favorite Color dbl
## (4): Age, Height (cm), Weight (kg), Income (USD) lgl (1): ...10
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...10`
str(gender)
## spc_tbl_ [131 × 10] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Gender : chr [1:131] "male" "male" "female" "male" ...
## $ Age : num [1:131] 32 25 41 38 29 45 27 52 31 36 ...
## $ Height (cm) : num [1:131] 175 182 160 178 165 190 163 179 168 177 ...
## $ Weight (kg) : num [1:131] 70 85 62 79 58 92 55 83 61 76 ...
## $ Occupation : chr [1:131] "Software Engineer" "Sales Representative" "Doctor" "Lawyer" ...
## $ Education Level: chr [1:131] "Master's Degree" "Bachelor's Degree" "Doctorate Degree" "Bachelor's Degree" ...
## $ Marital Status : chr [1:131] "Married" "Single" "Married" "Single" ...
## $ Income (USD) : num [1:131] 75000 45000 120000 90000 35000 110000 50000 500000 80000 95000 ...
## $ Favorite Color : chr [1:131] "Blue" "Green" "Purple" "Red" ...
## $ ...10 : logi [1:131] NA NA NA NA NA NA ...
## - attr(*, "spec")=
## .. cols(
## .. Gender = col_character(),
## .. Age = col_double(),
## .. `Height (cm)` = col_double(),
## .. `Weight (kg)` = col_double(),
## .. Occupation = col_character(),
## .. `Education Level` = col_character(),
## .. `Marital Status` = col_character(),
## .. `Income (USD)` = col_double(),
## .. `Favorite Color` = col_character(),
## .. ...10 = col_logical()
## .. )
## - attr(*, "problems")=<externalptr>
df <- as.data.frame(gender)
View(df)
df_1 = df
df_1 = df_1[, -9:-10]
df_1 = df_1[, -5:-7]
View(df_1)
library(tidyverse)
df_1 <- df_1 %>%
mutate(
Gender = ifelse(Gender %in% c("male"), "M", "W")
)
View(df_1)
boxplot(df_1[,2:5])
stars(df_1,labels = df_1$Gender)
library(SciViews)
library(scatterplot3d)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(lattice)
library(ggplot2)
library(ggridges)
library(ggvis)
##
## Attaching package: 'ggvis'
##
## The following object is masked from 'package:ggplot2':
##
## resolution
library(ggthemes)
library(cowplot)
##
## Attaching package: 'cowplot'
##
## The following object is masked from 'package:ggthemes':
##
## theme_map
library(gapminder)
library(gganimate)
## No renderer backend detected. gganimate will default to writing frames to separate files
## Consider installing:
## - the `gifski` package for gif output
## - the `av` package for video output
## and restarting the R session
##
## Attaching package: 'gganimate'
##
## The following object is masked from 'package:ggvis':
##
## view_static
library(dplyr)
library(tidyverse)
library(grid)
library(gridExtra)
##
## Attaching package: 'gridExtra'
##
## The following object is masked from 'package:dplyr':
##
## combine
library(RColorBrewer)
library(hexbin)
names(df_1)
## [1] "Gender" "Age" "Height (cm)" "Weight (kg)" "Income (USD)"
ggscatmat(df_1, columns=2:5, color="Gender")
## Warning: The dot-dot notation (`..scaled..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(scaled)` instead.
## ℹ The deprecated feature was likely used in the GGally package.
## Please report the issue at <]8;;https://github.com/ggobi/ggally/issueshttps://github.com/ggobi/ggally/issues]8;;>.
# ggplot
#Using ggplot to plot different variables against survivorship of birds
ggplot(df_1, aes(x=Gender,y=Age)) + geom_point(aes(color=Gender))
ggplot(df_1, aes(x=Gender,y=`Height (cm)`)) + geom_point(aes(color=Gender))
ggplot(df_1, aes(x=Gender,y=`Weight (kg)`)) + geom_point(aes(color=Gender))
ggplot(df_1, aes(x=Gender,y=`Income (USD)`)) + geom_point(aes(color=Gender))
ggplot(df_1, aes(x=Age, y=`Income (USD)`)) + geom_point(aes(color=Gender))
ggplot(df_1, aes(x=Age,y=`Income (USD)`)) + geom_point(aes(color=Gender), pch=17) +
labs(x="Age", y="Income in USD", title="Gender data")
# bar chart
ggplot(df_1, aes(Age)) + geom_bar(position="stack",aes(color=Gender))
ggplot(df_1, aes(`Height (cm)`)) + geom_bar(position="stack",aes(color=Gender))
ggplot(df_1, aes(`Weight (kg)`)) + geom_bar(position="stack",aes(color=Gender))
ggplot(df_1, aes(`Income (USD)`)) + geom_bar(position="stack",aes(color=Gender))
ggplot(df_1, aes(Age)) + facet_grid(.~Gender) + geom_bar(position="dodge",aes(color=Gender))
ggplot(df_1, aes(`Height (cm)`)) + facet_grid(.~Gender) + geom_bar(position="dodge",aes(color=Gender))
ggplot(df_1, aes(`Weight (kg)`)) + facet_grid(.~Gender) + geom_bar(position="dodge",aes(color=Gender))
ggplot(df_1, aes(`Income (USD)`)) + facet_grid(.~Gender) + geom_bar(position="dodge",aes(color=Gender))
# histogram
ggplot(df_1, aes(Age))+geom_histogram(aes(color=Gender))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(df_1, aes(`Height (cm)`))+geom_histogram(aes(fill = after_stat(count)))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(df_1, aes(`Weight (kg)`))+geom_histogram(aes(fill = after_stat(count)))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# violin plot
ggplot(df_1, aes(x=Gender, y=Age)) + geom_violin(aes(color=Gender))
ggplot(df_1, aes(x=Gender, y=`Height (cm)`)) + geom_violin(aes(color=Gender))
ggplot(df_1, aes(x=Gender, y=`Weight (kg)`)) + geom_violin(aes(color=Gender))
ggplot(df_1, aes(x=Gender, y=`Income (USD)`)) + geom_violin(aes(color=Gender))
# box plot
ggplot(df_1, aes(x=Gender, y=Age)) + geom_boxplot(aes(color=Gender))
ggplot(df_1, aes(x=Gender, y=`Height (cm)`)) + geom_boxplot(aes(color=Gender))
ggplot(df_1, aes(x=Gender, y=`Weight (kg)`)) + geom_boxplot(aes(color=Gender))
ggplot(df_1, aes(x=Gender, y=`Income (USD)`)) + geom_boxplot(aes(color=Gender))
# density plot and ggridges
ggplot(df_1, aes(x=Age, fill=Gender, color=Gender)) + geom_density(alpha=0.3, aes(y=..scaled..))
ggplot(df_1, aes(x=Age, y=Gender)) + geom_density_ridges(aes(fill=Gender, color=Gender))
## Picking joint bandwidth of 1.6
# hexbin
ggplot(df_1, aes(x=Age, y=Gender)) + geom_hex(aes(color=Gender))
# with ggthemes (see also ggsci, ggthemr)
lastplot <- ggplot(df_1, aes(x=Age,y=`Income (USD)`)) + geom_point(aes(color=Gender), pch=18) +
labs(x="Age", y="Income (USD)", title="Gender Data")
lastplot + theme_bw()
lastplot + theme_cowplot()
lastplot + theme_dark()
lastplot + theme_economist()
lastplot + theme_fivethirtyeight()
lastplot + theme_tufte()
lastplot + theme_wsj()
Inference: The height and weight are highly correlated for females. Income and age are correlated, the higher the age, the more they are earning. Age and weight also have a positive correlation.
library(readr)
library(caret)
##
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
##
## lift
gender <- read_csv("~/Documents/Courses/Multivariate Analysis/gender.csv")
## New names:
## • `` -> `...10`
## Rows: 131 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): Gender, Occupation, Education Level, Marital Status, Favorite Color
## dbl (4): Age, Height (cm), Weight (kg), Income (USD)
## lgl (1): ...10
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
str(gender)
## spc_tbl_ [131 × 10] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Gender : chr [1:131] "male" "male" "female" "male" ...
## $ Age : num [1:131] 32 25 41 38 29 45 27 52 31 36 ...
## $ Height (cm) : num [1:131] 175 182 160 178 165 190 163 179 168 177 ...
## $ Weight (kg) : num [1:131] 70 85 62 79 58 92 55 83 61 76 ...
## $ Occupation : chr [1:131] "Software Engineer" "Sales Representative" "Doctor" "Lawyer" ...
## $ Education Level: chr [1:131] "Master's Degree" "Bachelor's Degree" "Doctorate Degree" "Bachelor's Degree" ...
## $ Marital Status : chr [1:131] "Married" "Single" "Married" "Single" ...
## $ Income (USD) : num [1:131] 75000 45000 120000 90000 35000 110000 50000 500000 80000 95000 ...
## $ Favorite Color : chr [1:131] "Blue" "Green" "Purple" "Red" ...
## $ ...10 : logi [1:131] NA NA NA NA NA NA ...
## - attr(*, "spec")=
## .. cols(
## .. Gender = col_character(),
## .. Age = col_double(),
## .. `Height (cm)` = col_double(),
## .. `Weight (kg)` = col_double(),
## .. Occupation = col_character(),
## .. `Education Level` = col_character(),
## .. `Marital Status` = col_character(),
## .. `Income (USD)` = col_double(),
## .. `Favorite Color` = col_character(),
## .. ...10 = col_logical()
## .. )
## - attr(*, "problems")=<externalptr>
attach(gender)
View(gender)
df <- as.data.frame(gender)
View(df)
df_1 = df
df_1 = df_1[, -9:-10]
df_1 = df_1[, -5:-7]
View(df_1)
#dmy <- dummyVars(" ~ .", data = df_1)
#trsf <- data.frame(predict(dmy, newdata = df_1))
#View(trsf)
#Get the Correlations between the measurements
cor(df_1[-1])
## Age Height (cm) Weight (kg) Income (USD)
## Age 1.0000000 0.7263077 0.7847381 0.6622781
## Height (cm) 0.7263077 1.0000000 0.9751570 0.4562168
## Weight (kg) 0.7847381 0.9751570 1.0000000 0.4860220
## Income (USD) 0.6622781 0.4562168 0.4860220 1.0000000
# Using prcomp to compute the principal components (eigenvalues and eigenvectors). With scale=TRUE, variable means are set to zero, and variances set to one
gender_pca <- prcomp(df_1[,-1],scale=TRUE)
gender_pca
## Standard deviations (1, .., p=4):
## [1] 1.7528090 0.8250917 0.4759699 0.1426079
##
## Rotation (n x k) = (4 x 4):
## PC1 PC2 PC3 PC4
## Age 0.5181109 -0.1651531 0.8309187 -0.1177267
## Height (cm) 0.5247634 0.4185942 -0.3375098 -0.6599162
## Weight (kg) 0.5386821 0.3668863 -0.1578648 0.7418185
## Income (USD) 0.4074384 -0.8141853 -0.4132068 0.0188764
summary(gender_pca)
## Importance of components:
## PC1 PC2 PC3 PC4
## Standard deviation 1.7528 0.8251 0.47597 0.14261
## Proportion of Variance 0.7681 0.1702 0.05664 0.00508
## Cumulative Proportion 0.7681 0.9383 0.99492 1.00000
# sample scores stored in sparrows_pca$x
# singular values (square roots of eigenvalues) stored in sparrow_pca$sdev
# loadings (eigenvectors) are stored in sparrows_pca$rotation
# variable means stored in sparrows_pca$center
# variable standard deviations stored in sparrows_pca$scale
# A table containing eigenvalues and %'s accounted, follows
# Eigenvalues are sdev^2
(eigen_gender <- gender_pca$sdev^2)
## [1] 3.07233937 0.68077633 0.22654730 0.02033701
names(eigen_gender) <- paste("PC",1:4,sep="")
eigen_gender
## PC1 PC2 PC3 PC4
## 3.07233937 0.68077633 0.22654730 0.02033701
sumlambdas <- sum(eigen_gender)
sumlambdas
## [1] 4
propvar <- eigen_gender/sumlambdas
propvar
## PC1 PC2 PC3 PC4
## 0.768084842 0.170194082 0.056636825 0.005084251
cumvar_gender <- cumsum(propvar)
cumvar_gender
## PC1 PC2 PC3 PC4
## 0.7680848 0.9382789 0.9949157 1.0000000
matlambdas <- rbind(eigen_gender,propvar,cumvar_gender)
matlambdas
## PC1 PC2 PC3 PC4
## eigen_gender 3.0723394 0.6807763 0.22654730 0.020337005
## propvar 0.7680848 0.1701941 0.05663682 0.005084251
## cumvar_gender 0.7680848 0.9382789 0.99491575 1.000000000
rownames(matlambdas) <- c("Eigenvalues","Prop. variance","Cum. prop. variance")
matlambdas
## PC1 PC2 PC3 PC4
## Eigenvalues 3.0723394 0.6807763 0.22654730 0.020337005
## Prop. variance 0.7680848 0.1701941 0.05663682 0.005084251
## Cum. prop. variance 0.7680848 0.9382789 0.99491575 1.000000000
round(matlambdas,4)
## PC1 PC2 PC3 PC4
## Eigenvalues 3.0723 0.6808 0.2265 0.0203
## Prop. variance 0.7681 0.1702 0.0566 0.0051
## Cum. prop. variance 0.7681 0.9383 0.9949 1.0000
summary(gender_pca)
## Importance of components:
## PC1 PC2 PC3 PC4
## Standard deviation 1.7528 0.8251 0.47597 0.14261
## Proportion of Variance 0.7681 0.1702 0.05664 0.00508
## Cumulative Proportion 0.7681 0.9383 0.99492 1.00000
gender_pca$rotation
## PC1 PC2 PC3 PC4
## Age 0.5181109 -0.1651531 0.8309187 -0.1177267
## Height (cm) 0.5247634 0.4185942 -0.3375098 -0.6599162
## Weight (kg) 0.5386821 0.3668863 -0.1578648 0.7418185
## Income (USD) 0.4074384 -0.8141853 -0.4132068 0.0188764
print(gender_pca)
## Standard deviations (1, .., p=4):
## [1] 1.7528090 0.8250917 0.4759699 0.1426079
##
## Rotation (n x k) = (4 x 4):
## PC1 PC2 PC3 PC4
## Age 0.5181109 -0.1651531 0.8309187 -0.1177267
## Height (cm) 0.5247634 0.4185942 -0.3375098 -0.6599162
## Weight (kg) 0.5386821 0.3668863 -0.1578648 0.7418185
## Income (USD) 0.4074384 -0.8141853 -0.4132068 0.0188764
## Sample scores stored in gender_pca$x
gender_pca$x
## PC1 PC2 PC3 PC4
## [1,] -0.26682087 0.3224077637 -0.311886867 -0.187467966
## [2,] 0.05752185 1.6447612522 -1.597225428 0.248181184
## [3,] -0.55914781 -1.4332510054 1.415657799 0.408106981
## [4,] 0.91413613 0.4090484469 0.199263017 -0.019883960
## [5,] -1.90996836 -0.0233483648 0.064090682 -0.022227752
## [6,] 2.96656117 0.9974011553 0.393872430 -0.374303465
## [7,] -2.25879441 -0.3241723896 -0.175951380 0.009032872
## [8,] 4.61777836 -4.3174964680 -0.236839228 -0.038179473
## [9,] -1.16576456 -0.3302413944 -0.072644783 -0.120215926
## [10,] 0.57550930 0.2702106127 -0.026924815 -0.073195994
## [11,] -2.72394162 -0.2414706901 -0.469753722 0.030218894
## [12,] 2.26549072 0.4058025498 0.555286835 -0.071173527
## [13,] -1.73608462 -0.1056245305 -0.253271366 0.037820314
## [14,] 0.37853423 1.2884057791 -1.071140722 0.196692895
## [15,] -0.77434838 -0.0004100307 0.154915926 -0.092825738
## [16,] 0.62701156 0.3585585278 0.184707287 0.103908489
## [17,] -1.89370880 0.3515394693 -0.504809516 -0.233931429
## [18,] 0.08392662 0.8870488849 -1.251207894 -0.388417969
## [19,] 0.36143052 1.1288586180 -0.281260440 0.242952604
## [20,] -0.30471507 -1.9619405357 1.714689487 0.122138047
## [21,] -0.40100339 0.7931682718 -0.708673398 0.019026911
## [22,] -1.44133500 -0.0984396498 0.195657041 0.235507865
## [23,] 2.30787102 0.8113220437 -0.205318300 -0.144629733
## [24,] -2.16117879 -0.4051226571 -0.244237269 0.070232985
## [25,] 4.40054684 -4.0174456964 -1.039902361 0.013431420
## [26,] -0.57369114 -0.1105804040 0.404693584 -0.130893480
## [27,] 1.16236655 0.0616292866 0.812187899 0.208701438
## [28,] -2.68124659 -0.4007194286 -0.108173183 0.154923215
## [29,] 3.07637905 0.0829875000 1.019666279 0.131372452
## [30,] -1.36238781 -0.0827303965 -0.099875673 -0.105643331
## [31,] 0.95299319 1.0128734623 -0.293905479 0.081215016
## [32,] -0.02824184 -0.0572673772 0.671966536 -0.176726699
## [33,] 0.06986612 0.4171258508 -0.634284454 0.305233677
## [34,] -1.80215351 0.0576875584 0.009659024 -0.045600297
## [35,] 0.15643836 0.3895300657 -0.495444491 0.285562480
## [36,] -1.64961729 0.0550357619 -0.488493294 -0.047904392
## [37,] 1.66989406 0.3844470810 0.345385106 0.114059209
## [38,] -1.31192572 -0.6794561869 0.964938779 -0.046882323
## [39,] 0.32618781 1.4153985371 -1.061792740 -0.145855556
## [40,] -0.41668261 -0.7151344725 -0.207813564 -0.076255280
## [41,] 4.91688158 -3.4470312485 -1.656653592 -0.028810646
## [42,] -2.63587247 -0.5429330156 0.206950801 -0.060371614
## [43,] 1.26380634 0.4945317986 0.218324996 0.114299384
## [44,] -1.51268787 -0.3810297361 0.341099229 -0.074868089
## [45,] 2.72341410 -0.1669210719 0.183015557 0.120074716
## [46,] -2.53571976 -0.2126761180 -0.176652879 0.048252662
## [47,] 0.40023357 0.4380972414 0.126634566 0.003728761
## [48,] -1.45651728 0.0058302855 -0.193825766 0.112078195
## [49,] 1.96189678 0.3569369516 0.361507521 0.132215580
## [50,] -1.25457296 -0.0016944733 -0.154307331 -0.129015877
## [51,] 0.33754592 0.7972758223 -0.146732919 0.133176280
## [52,] 0.69700952 0.5208431410 -0.229738225 0.097780442
## [53,] -1.80130935 -0.1576531147 -0.211321060 0.119843671
## [54,] 2.35056605 0.6520733052 0.156262239 -0.019925412
## [55,] -1.82996718 -0.5994919395 0.295701667 -0.065827963
## [56,] 0.68033034 0.8989796167 -1.157085592 0.045270083
## [57,] -0.45960615 -0.1931549923 0.515631279 -0.149290027
## [58,] 2.96964148 0.4811093463 0.115714931 -0.180388350
## [59,] -1.55677490 -0.1361705346 -0.184283977 -0.062599589
## [60,] 0.95045611 0.6016662973 0.021412311 0.033790853
## [61,] -2.46561689 -0.2386475823 -0.217036500 0.108178125
## [62,] 2.03871273 0.4853412634 0.497214114 -0.171353255
## [63,] -0.90340584 -0.1900776620 0.402619207 -0.065751844
## [64,] 0.33724499 0.0851175212 0.111407457 0.114223265
## [65,] -1.99301735 -0.1940581013 -0.342185920 -0.177111737
## [66,] 2.48284121 -1.1259520703 -0.448359763 0.208400238
## [67,] -1.18934823 0.0503341109 -0.196257638 -0.211039234
## [68,] 0.57825620 0.3049056228 0.405881177 0.106335187
## [69,] -1.59265656 -1.0680312816 -0.343334871 -0.035836958
## [70,] 1.78651613 0.7130796577 0.141005010 0.143086826
## [71,] -0.89724060 -0.1654320960 0.193926669 -0.126829354
## [72,] 1.32415293 0.6245604313 0.174808004 -0.109672792
## [73,] -1.31437169 -0.0019928959 0.273992411 -0.207460488
## [74,] 3.01094451 0.4074710707 0.313492802 0.081288791
## [75,] -2.22988966 -0.4647616558 -0.040050981 -0.126665298
## [76,] 0.32511054 0.9362407677 -0.103409734 0.189277791
## [77,] -1.50652263 -0.3563841700 0.132406692 -0.135945599
## [78,] 2.21259646 0.4030650977 0.179852066 -0.111305190
## [79,] -1.13168073 0.1633275920 -0.193318074 -0.095012261
## [80,] 0.92030137 0.4336940129 -0.009429521 -0.080961469
## [81,] 0.44909383 0.3034940689 0.279522567 0.067355572
## [82,] -2.52457147 -0.4542866779 -0.009716841 0.195177481
## [83,] 2.24285611 0.3827813046 0.584755789 0.069500642
## [84,] -1.10406307 -0.0799072887 0.152841549 -0.027684101
## [85,] 1.14091412 0.3295097333 0.257335739 0.080295768
## [86,] -1.78748197 -0.3822285231 -0.090841576 -0.073230660
## [87,] 2.25881474 0.0455108376 0.273995966 -0.161156050
## [88,] -1.08769861 0.1067244680 -0.041996758 -0.173334269
## [89,] 0.78368668 0.3049912785 0.283163630 0.144162754
## [90,] -2.23744690 -0.4037967588 0.004838889 0.071385032
## [91,] 1.77770891 0.4654830041 0.290953447 0.090686664
## [92,] -0.78942575 -0.0843961729 0.139495010 -0.150201899
## [93,] 0.73611350 0.5422400655 -0.079983595 -0.122490386
## [94,] -1.84379456 -0.3749165311 0.175222184 0.127246368
## [95,] 3.00067286 -0.8294041627 -0.527246435 0.095238861
## [96,] -0.89097046 -0.3290426074 0.359296022 -0.121853354
## [97,] 0.52173379 0.6887297697 -0.076178845 0.174705197
## [98,] -1.37702686 -0.9059594353 -0.452198188 -0.082582048
## [99,] 1.85174086 0.7651082419 0.099054704 0.061063469
## [100,] -1.46616867 -0.0264256950 0.177102754 -0.105765935
## [101,] 2.23662307 0.3404758800 0.314185780 -0.388952179
## [102,] -1.10406307 -0.0799072887 0.152841549 -0.027684101
## [103,] 0.93537874 0.5176801551 0.005991395 -0.023585308
## [104,] -2.10211929 -0.3777396389 -0.077495038 0.049287138
## [105,] 0.33978208 0.4963246862 -0.203910334 0.161647428
## [106,] 0.26957429 0.7105522279 -0.537588605 0.035668457
## [107,] -2.28142902 -0.3471936348 -0.146482426 0.149707041
## [108,] 2.24285611 0.3827813046 0.584755789 0.069500642
## [109,] -0.77424347 -0.1886661081 0.528977818 -0.026772229
## [110,] 1.02195098 0.4900843700 0.144831358 -0.043256505
## [111,] -1.77856984 -0.3228879469 0.133271878 0.045223011
## [112,] 2.25742274 0.1311213006 0.110193298 -0.024183230
## [113,] -2.30883686 -0.4804709091 0.255481734 0.214485899
## [114,] 0.36252159 0.3310898540 0.140682603 0.087026768
## [115,] -1.47636789 -0.1884118857 0.163248524 -0.021193276
## [116,] 1.93079297 0.5925614177 0.177583881 -0.214034219
## [117,] -0.93983072 -0.1944394350 0.206408021 -0.185480165
## [118,] 0.62690665 0.5468146053 -0.189354604 0.037854980
## [119,] -2.23744690 -0.4037967588 0.004838889 0.071385032
## [120,] 2.59194315 -0.9422705326 -0.713050645 -0.017998636
## [121,] -0.99878531 -0.4100785306 0.413727681 -0.098480809
## [122,] 0.97573271 0.8476386301 0.050687458 0.006594356
## [123,] -1.48484171 -0.9869953585 -0.397766529 -0.059209503
## [124,] 1.78651613 0.7130796577 0.141005010 0.143086826
## [125,] -1.35835382 0.0546102281 0.122671096 -0.129138480
## [126,] 2.21398846 0.3174546347 0.343654734 -0.248278010
## [127,] -0.93102350 0.0531572187 0.056459584 -0.133080003
## [128,] 0.78368668 0.3049912785 0.283163630 0.144162754
## [129,] -2.29650639 -0.4311797770 -0.161903342 0.092330880
## [130,] 0.14539498 0.4428845481 -0.288318639 0.204691170
## [131,] -1.85270670 -0.4342571073 -0.048891270 0.008792697
# Identifying the scores by their gender
gendertyp_pca <- cbind(data.frame(Gender),gender_pca$x)
View(gendertyp_pca)
# Means of scores for all the PC's classified by Gender
tabmeansPC <- aggregate(gendertyp_pca[,2:5],by=list(Gender=df_1$Gender),mean)
View(tabmeansPC)
tabmeansPC <- tabmeansPC[rev(order(tabmeansPC$Gender)),]
View(tabmeansPC)
tabfmeans <- t(tabmeansPC[,-1])
tabfmeans
## 2 1
## PC1 1.41258066 -1.52469023
## PC2 0.27739465 -0.29941010
## PC3 -0.06912396 0.07460999
## PC4 0.02583925 -0.02788998
colnames(tabfmeans) <- t(as.vector(tabmeansPC[1]$Gender))
tabfmeans
## male female
## PC1 1.41258066 -1.52469023
## PC2 0.27739465 -0.29941010
## PC3 -0.06912396 0.07460999
## PC4 0.02583925 -0.02788998
# Standard deviations of scores for all the PC's classified by Gender
tabsdsPC <- aggregate(gendertyp_pca[,2:5],by=list(Gender=df_1$Gender),sd)
tabfsds <- t(tabsdsPC[,-1])
colnames(tabfsds) <- t(as.vector(tabsdsPC[1]$Gender))
tabfsds
## female male
## PC1 0.6425161 1.1696910
## PC2 0.3725670 1.0149242
## PC3 0.3973466 0.5323380
## PC4 0.1252452 0.1534115
t.test(PC1~df_1$Gender,data=gendertyp_pca)
##
## Welch Two Sample t-test
##
## data: PC1 by df_1$Gender
## t = -17.985, df = 105.64, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group female and group male is not equal to 0
## 95 percent confidence interval:
## -3.261079 -2.613463
## sample estimates:
## mean in group female mean in group male
## -1.524690 1.412581
t.test(PC2~df_1$Gender,data=gendertyp_pca)
##
## Welch Two Sample t-test
##
## data: PC2 by df_1$Gender
## t = -4.3789, df = 85.943, p-value = 3.347e-05
## alternative hypothesis: true difference in means between group female and group male is not equal to 0
## 95 percent confidence interval:
## -0.8386671 -0.3149424
## sample estimates:
## mean in group female mean in group male
## -0.2994101 0.2773947
t.test(PC3~df_1$Gender,data=gendertyp_pca)
##
## Welch Two Sample t-test
##
## data: PC3 by df_1$Gender
## t = 1.7595, df = 123.53, p-value = 0.08097
## alternative hypothesis: true difference in means between group female and group male is not equal to 0
## 95 percent confidence interval:
## -0.0179625 0.3054304
## sample estimates:
## mean in group female mean in group male
## 0.07460999 -0.06912396
t.test(PC4~df_1$Gender,data=gendertyp_pca)
##
## Welch Two Sample t-test
##
## data: PC4 by df_1$Gender
## t = -2.2025, df = 127.03, p-value = 0.02943
## alternative hypothesis: true difference in means between group female and group male is not equal to 0
## 95 percent confidence interval:
## -0.102001531 -0.005456937
## sample estimates:
## mean in group female mean in group male
## -0.02788998 0.02583925
## F ratio tests
var.test(PC1~df_1$Gender,data=gendertyp_pca)
##
## F test to compare two variances
##
## data: PC1 by df_1$Gender
## F = 0.30174, num df = 62, denom df = 67, p-value = 3.82e-06
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.1847713 0.4950895
## sample estimates:
## ratio of variances
## 0.301735
var.test(PC2~df_1$Gender,data=gendertyp_pca)
##
## F test to compare two variances
##
## data: PC2 by df_1$Gender
## F = 0.13475, num df = 62, denom df = 67, p-value = 1.573e-13
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.0825183 0.2211055
## sample estimates:
## ratio of variances
## 0.134754
var.test(PC3~df_1$Gender,data=gendertyp_pca)
##
## F test to compare two variances
##
## data: PC3 by df_1$Gender
## F = 0.55714, num df = 62, denom df = 67, p-value = 0.02089
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.3411717 0.9141600
## sample estimates:
## ratio of variances
## 0.5571398
var.test(PC4~df_1$Gender,data=gendertyp_pca)
##
## F test to compare two variances
##
## data: PC4 by df_1$Gender
## F = 0.66651, num df = 62, denom df = 67, p-value = 0.1077
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.4081457 1.0936149
## sample estimates:
## ratio of variances
## 0.6665096
# Levene's tests (one-sided)
library(car)
(LTPC1 <- leveneTest(PC1~df_1$Gender,data=gendertyp_pca))
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 14.476 0.0002182 ***
## 129
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(p_PC1_1sided <- LTPC1[[3]][1]/2)
## [1] 0.0001090988
(LTPC2 <- leveneTest(PC2~df_1$Gender,data=gendertyp_pca))
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 3.3981 0.06756 .
## 129
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(p_PC2_1sided=LTPC2[[3]][1]/2)
## [1] 0.03378249
(LTPC3 <- leveneTest(PC3~df_1$Gender,data=gendertyp_pca))
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 2.8534 0.0936 .
## 129
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(p_PC3_1sided <- LTPC3[[3]][1]/2)
## [1] 0.04679924
(LTPC4 <- leveneTest(PC4~df_1$Gender,data=gendertyp_pca))
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 1.7025 0.1943
## 129
(p_PC4_1sided <- LTPC4[[3]][1]/2)
## [1] 0.09713891
# Plotting the scores for the first and second components
plot(gendertyp_pca$PC1, gendertyp_pca$PC2,pch=ifelse(gendertyp_pca$Gender == "M",1,16),xlab="PC1", ylab="PC2", main="131 people against values for PC1 & PC2")
abline(h=0)
abline(v=0)
legend("bottomleft", legend=c("Male","Female"), pch=c(1,16))
plot(eigen_gender, xlab = "Component number", ylab = "Component variance", type = "l", main = "Scree diagram")
plot(log(eigen_gender), xlab = "Component number",ylab = "log(Component variance)", type="l",main = "Log(eigenvalue) diagram")
print(summary(gender_pca))
## Importance of components:
## PC1 PC2 PC3 PC4
## Standard deviation 1.7528 0.8251 0.47597 0.14261
## Proportion of Variance 0.7681 0.1702 0.05664 0.00508
## Cumulative Proportion 0.7681 0.9383 0.99492 1.00000
diag(cov(gender_pca$x))
## PC1 PC2 PC3 PC4
## 3.07233937 0.68077633 0.22654730 0.02033701
xlim <- range(gender_pca$x[,1])
xlim
## [1] -2.723942 4.916882
gender_pca$x[,1]
## [1] -0.26682087 0.05752185 -0.55914781 0.91413613 -1.90996836 2.96656117
## [7] -2.25879441 4.61777836 -1.16576456 0.57550930 -2.72394162 2.26549072
## [13] -1.73608462 0.37853423 -0.77434838 0.62701156 -1.89370880 0.08392662
## [19] 0.36143052 -0.30471507 -0.40100339 -1.44133500 2.30787102 -2.16117879
## [25] 4.40054684 -0.57369114 1.16236655 -2.68124659 3.07637905 -1.36238781
## [31] 0.95299319 -0.02824184 0.06986612 -1.80215351 0.15643836 -1.64961729
## [37] 1.66989406 -1.31192572 0.32618781 -0.41668261 4.91688158 -2.63587247
## [43] 1.26380634 -1.51268787 2.72341410 -2.53571976 0.40023357 -1.45651728
## [49] 1.96189678 -1.25457296 0.33754592 0.69700952 -1.80130935 2.35056605
## [55] -1.82996718 0.68033034 -0.45960615 2.96964148 -1.55677490 0.95045611
## [61] -2.46561689 2.03871273 -0.90340584 0.33724499 -1.99301735 2.48284121
## [67] -1.18934823 0.57825620 -1.59265656 1.78651613 -0.89724060 1.32415293
## [73] -1.31437169 3.01094451 -2.22988966 0.32511054 -1.50652263 2.21259646
## [79] -1.13168073 0.92030137 0.44909383 -2.52457147 2.24285611 -1.10406307
## [85] 1.14091412 -1.78748197 2.25881474 -1.08769861 0.78368668 -2.23744690
## [91] 1.77770891 -0.78942575 0.73611350 -1.84379456 3.00067286 -0.89097046
## [97] 0.52173379 -1.37702686 1.85174086 -1.46616867 2.23662307 -1.10406307
## [103] 0.93537874 -2.10211929 0.33978208 0.26957429 -2.28142902 2.24285611
## [109] -0.77424347 1.02195098 -1.77856984 2.25742274 -2.30883686 0.36252159
## [115] -1.47636789 1.93079297 -0.93983072 0.62690665 -2.23744690 2.59194315
## [121] -0.99878531 0.97573271 -1.48484171 1.78651613 -1.35835382 2.21398846
## [127] -0.93102350 0.78368668 -2.29650639 0.14539498 -1.85270670
gender_pca$x
## PC1 PC2 PC3 PC4
## [1,] -0.26682087 0.3224077637 -0.311886867 -0.187467966
## [2,] 0.05752185 1.6447612522 -1.597225428 0.248181184
## [3,] -0.55914781 -1.4332510054 1.415657799 0.408106981
## [4,] 0.91413613 0.4090484469 0.199263017 -0.019883960
## [5,] -1.90996836 -0.0233483648 0.064090682 -0.022227752
## [6,] 2.96656117 0.9974011553 0.393872430 -0.374303465
## [7,] -2.25879441 -0.3241723896 -0.175951380 0.009032872
## [8,] 4.61777836 -4.3174964680 -0.236839228 -0.038179473
## [9,] -1.16576456 -0.3302413944 -0.072644783 -0.120215926
## [10,] 0.57550930 0.2702106127 -0.026924815 -0.073195994
## [11,] -2.72394162 -0.2414706901 -0.469753722 0.030218894
## [12,] 2.26549072 0.4058025498 0.555286835 -0.071173527
## [13,] -1.73608462 -0.1056245305 -0.253271366 0.037820314
## [14,] 0.37853423 1.2884057791 -1.071140722 0.196692895
## [15,] -0.77434838 -0.0004100307 0.154915926 -0.092825738
## [16,] 0.62701156 0.3585585278 0.184707287 0.103908489
## [17,] -1.89370880 0.3515394693 -0.504809516 -0.233931429
## [18,] 0.08392662 0.8870488849 -1.251207894 -0.388417969
## [19,] 0.36143052 1.1288586180 -0.281260440 0.242952604
## [20,] -0.30471507 -1.9619405357 1.714689487 0.122138047
## [21,] -0.40100339 0.7931682718 -0.708673398 0.019026911
## [22,] -1.44133500 -0.0984396498 0.195657041 0.235507865
## [23,] 2.30787102 0.8113220437 -0.205318300 -0.144629733
## [24,] -2.16117879 -0.4051226571 -0.244237269 0.070232985
## [25,] 4.40054684 -4.0174456964 -1.039902361 0.013431420
## [26,] -0.57369114 -0.1105804040 0.404693584 -0.130893480
## [27,] 1.16236655 0.0616292866 0.812187899 0.208701438
## [28,] -2.68124659 -0.4007194286 -0.108173183 0.154923215
## [29,] 3.07637905 0.0829875000 1.019666279 0.131372452
## [30,] -1.36238781 -0.0827303965 -0.099875673 -0.105643331
## [31,] 0.95299319 1.0128734623 -0.293905479 0.081215016
## [32,] -0.02824184 -0.0572673772 0.671966536 -0.176726699
## [33,] 0.06986612 0.4171258508 -0.634284454 0.305233677
## [34,] -1.80215351 0.0576875584 0.009659024 -0.045600297
## [35,] 0.15643836 0.3895300657 -0.495444491 0.285562480
## [36,] -1.64961729 0.0550357619 -0.488493294 -0.047904392
## [37,] 1.66989406 0.3844470810 0.345385106 0.114059209
## [38,] -1.31192572 -0.6794561869 0.964938779 -0.046882323
## [39,] 0.32618781 1.4153985371 -1.061792740 -0.145855556
## [40,] -0.41668261 -0.7151344725 -0.207813564 -0.076255280
## [41,] 4.91688158 -3.4470312485 -1.656653592 -0.028810646
## [42,] -2.63587247 -0.5429330156 0.206950801 -0.060371614
## [43,] 1.26380634 0.4945317986 0.218324996 0.114299384
## [44,] -1.51268787 -0.3810297361 0.341099229 -0.074868089
## [45,] 2.72341410 -0.1669210719 0.183015557 0.120074716
## [46,] -2.53571976 -0.2126761180 -0.176652879 0.048252662
## [47,] 0.40023357 0.4380972414 0.126634566 0.003728761
## [48,] -1.45651728 0.0058302855 -0.193825766 0.112078195
## [49,] 1.96189678 0.3569369516 0.361507521 0.132215580
## [50,] -1.25457296 -0.0016944733 -0.154307331 -0.129015877
## [51,] 0.33754592 0.7972758223 -0.146732919 0.133176280
## [52,] 0.69700952 0.5208431410 -0.229738225 0.097780442
## [53,] -1.80130935 -0.1576531147 -0.211321060 0.119843671
## [54,] 2.35056605 0.6520733052 0.156262239 -0.019925412
## [55,] -1.82996718 -0.5994919395 0.295701667 -0.065827963
## [56,] 0.68033034 0.8989796167 -1.157085592 0.045270083
## [57,] -0.45960615 -0.1931549923 0.515631279 -0.149290027
## [58,] 2.96964148 0.4811093463 0.115714931 -0.180388350
## [59,] -1.55677490 -0.1361705346 -0.184283977 -0.062599589
## [60,] 0.95045611 0.6016662973 0.021412311 0.033790853
## [61,] -2.46561689 -0.2386475823 -0.217036500 0.108178125
## [62,] 2.03871273 0.4853412634 0.497214114 -0.171353255
## [63,] -0.90340584 -0.1900776620 0.402619207 -0.065751844
## [64,] 0.33724499 0.0851175212 0.111407457 0.114223265
## [65,] -1.99301735 -0.1940581013 -0.342185920 -0.177111737
## [66,] 2.48284121 -1.1259520703 -0.448359763 0.208400238
## [67,] -1.18934823 0.0503341109 -0.196257638 -0.211039234
## [68,] 0.57825620 0.3049056228 0.405881177 0.106335187
## [69,] -1.59265656 -1.0680312816 -0.343334871 -0.035836958
## [70,] 1.78651613 0.7130796577 0.141005010 0.143086826
## [71,] -0.89724060 -0.1654320960 0.193926669 -0.126829354
## [72,] 1.32415293 0.6245604313 0.174808004 -0.109672792
## [73,] -1.31437169 -0.0019928959 0.273992411 -0.207460488
## [74,] 3.01094451 0.4074710707 0.313492802 0.081288791
## [75,] -2.22988966 -0.4647616558 -0.040050981 -0.126665298
## [76,] 0.32511054 0.9362407677 -0.103409734 0.189277791
## [77,] -1.50652263 -0.3563841700 0.132406692 -0.135945599
## [78,] 2.21259646 0.4030650977 0.179852066 -0.111305190
## [79,] -1.13168073 0.1633275920 -0.193318074 -0.095012261
## [80,] 0.92030137 0.4336940129 -0.009429521 -0.080961469
## [81,] 0.44909383 0.3034940689 0.279522567 0.067355572
## [82,] -2.52457147 -0.4542866779 -0.009716841 0.195177481
## [83,] 2.24285611 0.3827813046 0.584755789 0.069500642
## [84,] -1.10406307 -0.0799072887 0.152841549 -0.027684101
## [85,] 1.14091412 0.3295097333 0.257335739 0.080295768
## [86,] -1.78748197 -0.3822285231 -0.090841576 -0.073230660
## [87,] 2.25881474 0.0455108376 0.273995966 -0.161156050
## [88,] -1.08769861 0.1067244680 -0.041996758 -0.173334269
## [89,] 0.78368668 0.3049912785 0.283163630 0.144162754
## [90,] -2.23744690 -0.4037967588 0.004838889 0.071385032
## [91,] 1.77770891 0.4654830041 0.290953447 0.090686664
## [92,] -0.78942575 -0.0843961729 0.139495010 -0.150201899
## [93,] 0.73611350 0.5422400655 -0.079983595 -0.122490386
## [94,] -1.84379456 -0.3749165311 0.175222184 0.127246368
## [95,] 3.00067286 -0.8294041627 -0.527246435 0.095238861
## [96,] -0.89097046 -0.3290426074 0.359296022 -0.121853354
## [97,] 0.52173379 0.6887297697 -0.076178845 0.174705197
## [98,] -1.37702686 -0.9059594353 -0.452198188 -0.082582048
## [99,] 1.85174086 0.7651082419 0.099054704 0.061063469
## [100,] -1.46616867 -0.0264256950 0.177102754 -0.105765935
## [101,] 2.23662307 0.3404758800 0.314185780 -0.388952179
## [102,] -1.10406307 -0.0799072887 0.152841549 -0.027684101
## [103,] 0.93537874 0.5176801551 0.005991395 -0.023585308
## [104,] -2.10211929 -0.3777396389 -0.077495038 0.049287138
## [105,] 0.33978208 0.4963246862 -0.203910334 0.161647428
## [106,] 0.26957429 0.7105522279 -0.537588605 0.035668457
## [107,] -2.28142902 -0.3471936348 -0.146482426 0.149707041
## [108,] 2.24285611 0.3827813046 0.584755789 0.069500642
## [109,] -0.77424347 -0.1886661081 0.528977818 -0.026772229
## [110,] 1.02195098 0.4900843700 0.144831358 -0.043256505
## [111,] -1.77856984 -0.3228879469 0.133271878 0.045223011
## [112,] 2.25742274 0.1311213006 0.110193298 -0.024183230
## [113,] -2.30883686 -0.4804709091 0.255481734 0.214485899
## [114,] 0.36252159 0.3310898540 0.140682603 0.087026768
## [115,] -1.47636789 -0.1884118857 0.163248524 -0.021193276
## [116,] 1.93079297 0.5925614177 0.177583881 -0.214034219
## [117,] -0.93983072 -0.1944394350 0.206408021 -0.185480165
## [118,] 0.62690665 0.5468146053 -0.189354604 0.037854980
## [119,] -2.23744690 -0.4037967588 0.004838889 0.071385032
## [120,] 2.59194315 -0.9422705326 -0.713050645 -0.017998636
## [121,] -0.99878531 -0.4100785306 0.413727681 -0.098480809
## [122,] 0.97573271 0.8476386301 0.050687458 0.006594356
## [123,] -1.48484171 -0.9869953585 -0.397766529 -0.059209503
## [124,] 1.78651613 0.7130796577 0.141005010 0.143086826
## [125,] -1.35835382 0.0546102281 0.122671096 -0.129138480
## [126,] 2.21398846 0.3174546347 0.343654734 -0.248278010
## [127,] -0.93102350 0.0531572187 0.056459584 -0.133080003
## [128,] 0.78368668 0.3049912785 0.283163630 0.144162754
## [129,] -2.29650639 -0.4311797770 -0.161903342 0.092330880
## [130,] 0.14539498 0.4428845481 -0.288318639 0.204691170
## [131,] -1.85270670 -0.4342571073 -0.048891270 0.008792697
plot(gender_pca$x,xlim=xlim,ylim=xlim)
gender_pca$rotation[,1]
## Age Height (cm) Weight (kg) Income (USD)
## 0.5181109 0.5247634 0.5386821 0.4074384
gender_pca$rotation
## PC1 PC2 PC3 PC4
## Age 0.5181109 -0.1651531 0.8309187 -0.1177267
## Height (cm) 0.5247634 0.4185942 -0.3375098 -0.6599162
## Weight (kg) 0.5386821 0.3668863 -0.1578648 0.7418185
## Income (USD) 0.4074384 -0.8141853 -0.4132068 0.0188764
plot(df_1[,-1])
gender_pca$x
## PC1 PC2 PC3 PC4
## [1,] -0.26682087 0.3224077637 -0.311886867 -0.187467966
## [2,] 0.05752185 1.6447612522 -1.597225428 0.248181184
## [3,] -0.55914781 -1.4332510054 1.415657799 0.408106981
## [4,] 0.91413613 0.4090484469 0.199263017 -0.019883960
## [5,] -1.90996836 -0.0233483648 0.064090682 -0.022227752
## [6,] 2.96656117 0.9974011553 0.393872430 -0.374303465
## [7,] -2.25879441 -0.3241723896 -0.175951380 0.009032872
## [8,] 4.61777836 -4.3174964680 -0.236839228 -0.038179473
## [9,] -1.16576456 -0.3302413944 -0.072644783 -0.120215926
## [10,] 0.57550930 0.2702106127 -0.026924815 -0.073195994
## [11,] -2.72394162 -0.2414706901 -0.469753722 0.030218894
## [12,] 2.26549072 0.4058025498 0.555286835 -0.071173527
## [13,] -1.73608462 -0.1056245305 -0.253271366 0.037820314
## [14,] 0.37853423 1.2884057791 -1.071140722 0.196692895
## [15,] -0.77434838 -0.0004100307 0.154915926 -0.092825738
## [16,] 0.62701156 0.3585585278 0.184707287 0.103908489
## [17,] -1.89370880 0.3515394693 -0.504809516 -0.233931429
## [18,] 0.08392662 0.8870488849 -1.251207894 -0.388417969
## [19,] 0.36143052 1.1288586180 -0.281260440 0.242952604
## [20,] -0.30471507 -1.9619405357 1.714689487 0.122138047
## [21,] -0.40100339 0.7931682718 -0.708673398 0.019026911
## [22,] -1.44133500 -0.0984396498 0.195657041 0.235507865
## [23,] 2.30787102 0.8113220437 -0.205318300 -0.144629733
## [24,] -2.16117879 -0.4051226571 -0.244237269 0.070232985
## [25,] 4.40054684 -4.0174456964 -1.039902361 0.013431420
## [26,] -0.57369114 -0.1105804040 0.404693584 -0.130893480
## [27,] 1.16236655 0.0616292866 0.812187899 0.208701438
## [28,] -2.68124659 -0.4007194286 -0.108173183 0.154923215
## [29,] 3.07637905 0.0829875000 1.019666279 0.131372452
## [30,] -1.36238781 -0.0827303965 -0.099875673 -0.105643331
## [31,] 0.95299319 1.0128734623 -0.293905479 0.081215016
## [32,] -0.02824184 -0.0572673772 0.671966536 -0.176726699
## [33,] 0.06986612 0.4171258508 -0.634284454 0.305233677
## [34,] -1.80215351 0.0576875584 0.009659024 -0.045600297
## [35,] 0.15643836 0.3895300657 -0.495444491 0.285562480
## [36,] -1.64961729 0.0550357619 -0.488493294 -0.047904392
## [37,] 1.66989406 0.3844470810 0.345385106 0.114059209
## [38,] -1.31192572 -0.6794561869 0.964938779 -0.046882323
## [39,] 0.32618781 1.4153985371 -1.061792740 -0.145855556
## [40,] -0.41668261 -0.7151344725 -0.207813564 -0.076255280
## [41,] 4.91688158 -3.4470312485 -1.656653592 -0.028810646
## [42,] -2.63587247 -0.5429330156 0.206950801 -0.060371614
## [43,] 1.26380634 0.4945317986 0.218324996 0.114299384
## [44,] -1.51268787 -0.3810297361 0.341099229 -0.074868089
## [45,] 2.72341410 -0.1669210719 0.183015557 0.120074716
## [46,] -2.53571976 -0.2126761180 -0.176652879 0.048252662
## [47,] 0.40023357 0.4380972414 0.126634566 0.003728761
## [48,] -1.45651728 0.0058302855 -0.193825766 0.112078195
## [49,] 1.96189678 0.3569369516 0.361507521 0.132215580
## [50,] -1.25457296 -0.0016944733 -0.154307331 -0.129015877
## [51,] 0.33754592 0.7972758223 -0.146732919 0.133176280
## [52,] 0.69700952 0.5208431410 -0.229738225 0.097780442
## [53,] -1.80130935 -0.1576531147 -0.211321060 0.119843671
## [54,] 2.35056605 0.6520733052 0.156262239 -0.019925412
## [55,] -1.82996718 -0.5994919395 0.295701667 -0.065827963
## [56,] 0.68033034 0.8989796167 -1.157085592 0.045270083
## [57,] -0.45960615 -0.1931549923 0.515631279 -0.149290027
## [58,] 2.96964148 0.4811093463 0.115714931 -0.180388350
## [59,] -1.55677490 -0.1361705346 -0.184283977 -0.062599589
## [60,] 0.95045611 0.6016662973 0.021412311 0.033790853
## [61,] -2.46561689 -0.2386475823 -0.217036500 0.108178125
## [62,] 2.03871273 0.4853412634 0.497214114 -0.171353255
## [63,] -0.90340584 -0.1900776620 0.402619207 -0.065751844
## [64,] 0.33724499 0.0851175212 0.111407457 0.114223265
## [65,] -1.99301735 -0.1940581013 -0.342185920 -0.177111737
## [66,] 2.48284121 -1.1259520703 -0.448359763 0.208400238
## [67,] -1.18934823 0.0503341109 -0.196257638 -0.211039234
## [68,] 0.57825620 0.3049056228 0.405881177 0.106335187
## [69,] -1.59265656 -1.0680312816 -0.343334871 -0.035836958
## [70,] 1.78651613 0.7130796577 0.141005010 0.143086826
## [71,] -0.89724060 -0.1654320960 0.193926669 -0.126829354
## [72,] 1.32415293 0.6245604313 0.174808004 -0.109672792
## [73,] -1.31437169 -0.0019928959 0.273992411 -0.207460488
## [74,] 3.01094451 0.4074710707 0.313492802 0.081288791
## [75,] -2.22988966 -0.4647616558 -0.040050981 -0.126665298
## [76,] 0.32511054 0.9362407677 -0.103409734 0.189277791
## [77,] -1.50652263 -0.3563841700 0.132406692 -0.135945599
## [78,] 2.21259646 0.4030650977 0.179852066 -0.111305190
## [79,] -1.13168073 0.1633275920 -0.193318074 -0.095012261
## [80,] 0.92030137 0.4336940129 -0.009429521 -0.080961469
## [81,] 0.44909383 0.3034940689 0.279522567 0.067355572
## [82,] -2.52457147 -0.4542866779 -0.009716841 0.195177481
## [83,] 2.24285611 0.3827813046 0.584755789 0.069500642
## [84,] -1.10406307 -0.0799072887 0.152841549 -0.027684101
## [85,] 1.14091412 0.3295097333 0.257335739 0.080295768
## [86,] -1.78748197 -0.3822285231 -0.090841576 -0.073230660
## [87,] 2.25881474 0.0455108376 0.273995966 -0.161156050
## [88,] -1.08769861 0.1067244680 -0.041996758 -0.173334269
## [89,] 0.78368668 0.3049912785 0.283163630 0.144162754
## [90,] -2.23744690 -0.4037967588 0.004838889 0.071385032
## [91,] 1.77770891 0.4654830041 0.290953447 0.090686664
## [92,] -0.78942575 -0.0843961729 0.139495010 -0.150201899
## [93,] 0.73611350 0.5422400655 -0.079983595 -0.122490386
## [94,] -1.84379456 -0.3749165311 0.175222184 0.127246368
## [95,] 3.00067286 -0.8294041627 -0.527246435 0.095238861
## [96,] -0.89097046 -0.3290426074 0.359296022 -0.121853354
## [97,] 0.52173379 0.6887297697 -0.076178845 0.174705197
## [98,] -1.37702686 -0.9059594353 -0.452198188 -0.082582048
## [99,] 1.85174086 0.7651082419 0.099054704 0.061063469
## [100,] -1.46616867 -0.0264256950 0.177102754 -0.105765935
## [101,] 2.23662307 0.3404758800 0.314185780 -0.388952179
## [102,] -1.10406307 -0.0799072887 0.152841549 -0.027684101
## [103,] 0.93537874 0.5176801551 0.005991395 -0.023585308
## [104,] -2.10211929 -0.3777396389 -0.077495038 0.049287138
## [105,] 0.33978208 0.4963246862 -0.203910334 0.161647428
## [106,] 0.26957429 0.7105522279 -0.537588605 0.035668457
## [107,] -2.28142902 -0.3471936348 -0.146482426 0.149707041
## [108,] 2.24285611 0.3827813046 0.584755789 0.069500642
## [109,] -0.77424347 -0.1886661081 0.528977818 -0.026772229
## [110,] 1.02195098 0.4900843700 0.144831358 -0.043256505
## [111,] -1.77856984 -0.3228879469 0.133271878 0.045223011
## [112,] 2.25742274 0.1311213006 0.110193298 -0.024183230
## [113,] -2.30883686 -0.4804709091 0.255481734 0.214485899
## [114,] 0.36252159 0.3310898540 0.140682603 0.087026768
## [115,] -1.47636789 -0.1884118857 0.163248524 -0.021193276
## [116,] 1.93079297 0.5925614177 0.177583881 -0.214034219
## [117,] -0.93983072 -0.1944394350 0.206408021 -0.185480165
## [118,] 0.62690665 0.5468146053 -0.189354604 0.037854980
## [119,] -2.23744690 -0.4037967588 0.004838889 0.071385032
## [120,] 2.59194315 -0.9422705326 -0.713050645 -0.017998636
## [121,] -0.99878531 -0.4100785306 0.413727681 -0.098480809
## [122,] 0.97573271 0.8476386301 0.050687458 0.006594356
## [123,] -1.48484171 -0.9869953585 -0.397766529 -0.059209503
## [124,] 1.78651613 0.7130796577 0.141005010 0.143086826
## [125,] -1.35835382 0.0546102281 0.122671096 -0.129138480
## [126,] 2.21398846 0.3174546347 0.343654734 -0.248278010
## [127,] -0.93102350 0.0531572187 0.056459584 -0.133080003
## [128,] 0.78368668 0.3049912785 0.283163630 0.144162754
## [129,] -2.29650639 -0.4311797770 -0.161903342 0.092330880
## [130,] 0.14539498 0.4428845481 -0.288318639 0.204691170
## [131,] -1.85270670 -0.4342571073 -0.048891270 0.008792697
plot(gender_pca)
#get the original value of the data based on PCA
center <- gender_pca$center
scale <- gender_pca$scale
new_gender <- as.matrix(df_1[,-1])
new_gender
## Age Height (cm) Weight (kg) Income (USD)
## [1,] 32 175 70 75000
## [2,] 25 182 85 45000
## [3,] 41 160 62 120000
## [4,] 38 178 79 90000
## [5,] 29 165 58 35000
## [6,] 45 190 92 110000
## [7,] 27 163 55 50000
## [8,] 52 179 83 500000
## [9,] 31 168 61 80000
## [10,] 36 177 76 95000
## [11,] 24 162 53 40000
## [12,] 44 183 87 120000
## [13,] 28 166 60 55000
## [14,] 29 181 84 60000
## [15,] 33 170 65 65000
## [16,] 37 176 78 85000
## [17,] 26 169 59 30000
## [18,] 28 182 75 80000
## [19,] 33 178 82 45000
## [20,] 44 160 58 150000
## [21,] 29 176 74 55000
## [22,] 31 165 63 50000
## [23,] 40 187 90 120000
## [24,] 27 163 56 60000
## [25,] 47 181 85 500000
## [26,] 35 170 65 70000
## [27,] 42 175 80 100000
## [28,] 26 160 53 40000
## [29,] 49 183 92 150000
## [30,] 30 168 61 60000
## [31,] 35 181 84 70000
## [32,] 38 172 68 75000
## [33,] 31 175 78 90000
## [34,] 29 166 59 35000
## [35,] 32 175 78 90000
## [36,] 27 168 61 55000
## [37,] 41 180 85 110000
## [38,] 36 163 57 65000
## [39,] 29 183 81 50000
## [40,] 33 170 65 130000
## [41,] 45 187 92 500000
## [42,] 28 160 50 40000
## [43,] 39 179 83 95000
## [44,] 32 165 58 60000
## [45,] 44 183 90 180000
## [46,] 26 162 54 35000
## [47,] 36 176 76 75000
## [48,] 29 167 63 55000
## [49,] 42 181 87 120000
## [50,] 30 169 62 60000
## [51,] 34 177 79 60000
## [52,] 35 178 80 90000
## [53,] 28 165 60 55000
## [54,] 42 185 90 120000
## [55,] 31 163 55 65000
## [56,] 30 182 83 95000
## [57,] 36 170 65 75000
## [58,] 44 188 92 150000
## [59,] 29 167 60 60000
## [60,] 37 179 81 85000
## [61,] 26 162 55 40000
## [62,] 43 183 85 110000
## [63,] 34 168 63 65000
## [64,] 36 174 75 95000
## [65,] 27 166 56 55000
## [66,] 41 180 86 250000
## [67,] 30 170 62 60000
## [68,] 38 175 77 80000
## [69,] 29 164 56 120000
## [70,] 40 182 88 100000
## [71,] 33 169 63 70000
## [72,] 39 181 82 90000
## [73,] 32 168 60 45000
## [74,] 45 186 94 150000
## [75,] 28 163 53 55000
## [76,] 34 177 80 50000
## [77,] 31 166 58 65000
## [78,] 42 184 87 130000
## [79,] 30 170 64 55000
## [80,] 37 179 79 95000
## [81,] 37 175 76 80000
## [82,] 27 160 54 45000
## [83,] 44 182 88 120000
## [84,] 32 168 63 60000
## [85,] 39 178 81 100000
## [86,] 29 165 57 65000
## [87,] 43 183 85 150000
## [88,] 31 170 63 55000
## [89,] 38 176 79 90000
## [90,] 28 162 55 50000
## [91,] 41 181 86 110000
## [92,] 33 170 64 70000
## [93,] 36 179 78 85000
## [94,] 30 163 58 55000
## [95,] 42 184 90 250000
## [96,] 34 168 62 75000
## [97,] 35 177 80 70000
## [98,] 29 166 58 120000
## [99,] 40 183 88 100000
## [100,] 31 167 60 45000
## [101,] 43 185 84 130000
## [102,] 32 168 63 60000
## [103,] 37 179 80 90000
## [104,] 28 163 56 55000
## [105,] 34 176 78 80000
## [106,] 32 178 78 75000
## [107,] 27 162 56 50000
## [108,] 44 182 88 120000
## [109,] 35 168 64 65000
## [110,] 38 179 80 90000
## [111,] 30 164 58 55000
## [112,] 42 183 87 150000
## [113,] 29 160 55 45000
## [114,] 36 175 76 80000
## [115,] 31 166 60 55000
## [116,] 41 184 85 110000
## [117,] 33 169 62 70000
## [118,] 35 178 79 85000
## [119,] 28 162 55 50000
## [120,] 40 183 86 250000
## [121,] 34 167 61 75000
## [122,] 37 180 82 70000
## [123,] 29 165 57 120000
## [124,] 40 182 88 100000
## [125,] 31 168 61 45000
## [126,] 43 184 85 130000
## [127,] 32 170 64 60000
## [128,] 38 176 79 90000
## [129,] 27 162 55 55000
## [130,] 33 175 77 80000
## [131,] 29 164 57 65000
drop(scale(new_gender,center=center, scale=scale)%*%gender_pca$rotation[,1])
## [1] -0.26682087 0.05752185 -0.55914781 0.91413613 -1.90996836 2.96656117
## [7] -2.25879441 4.61777836 -1.16576456 0.57550930 -2.72394162 2.26549072
## [13] -1.73608462 0.37853423 -0.77434838 0.62701156 -1.89370880 0.08392662
## [19] 0.36143052 -0.30471507 -0.40100339 -1.44133500 2.30787102 -2.16117879
## [25] 4.40054684 -0.57369114 1.16236655 -2.68124659 3.07637905 -1.36238781
## [31] 0.95299319 -0.02824184 0.06986612 -1.80215351 0.15643836 -1.64961729
## [37] 1.66989406 -1.31192572 0.32618781 -0.41668261 4.91688158 -2.63587247
## [43] 1.26380634 -1.51268787 2.72341410 -2.53571976 0.40023357 -1.45651728
## [49] 1.96189678 -1.25457296 0.33754592 0.69700952 -1.80130935 2.35056605
## [55] -1.82996718 0.68033034 -0.45960615 2.96964148 -1.55677490 0.95045611
## [61] -2.46561689 2.03871273 -0.90340584 0.33724499 -1.99301735 2.48284121
## [67] -1.18934823 0.57825620 -1.59265656 1.78651613 -0.89724060 1.32415293
## [73] -1.31437169 3.01094451 -2.22988966 0.32511054 -1.50652263 2.21259646
## [79] -1.13168073 0.92030137 0.44909383 -2.52457147 2.24285611 -1.10406307
## [85] 1.14091412 -1.78748197 2.25881474 -1.08769861 0.78368668 -2.23744690
## [91] 1.77770891 -0.78942575 0.73611350 -1.84379456 3.00067286 -0.89097046
## [97] 0.52173379 -1.37702686 1.85174086 -1.46616867 2.23662307 -1.10406307
## [103] 0.93537874 -2.10211929 0.33978208 0.26957429 -2.28142902 2.24285611
## [109] -0.77424347 1.02195098 -1.77856984 2.25742274 -2.30883686 0.36252159
## [115] -1.47636789 1.93079297 -0.93983072 0.62690665 -2.23744690 2.59194315
## [121] -0.99878531 0.97573271 -1.48484171 1.78651613 -1.35835382 2.21398846
## [127] -0.93102350 0.78368668 -2.29650639 0.14539498 -1.85270670
predict(gender_pca)[,1]
## [1] -0.26682087 0.05752185 -0.55914781 0.91413613 -1.90996836 2.96656117
## [7] -2.25879441 4.61777836 -1.16576456 0.57550930 -2.72394162 2.26549072
## [13] -1.73608462 0.37853423 -0.77434838 0.62701156 -1.89370880 0.08392662
## [19] 0.36143052 -0.30471507 -0.40100339 -1.44133500 2.30787102 -2.16117879
## [25] 4.40054684 -0.57369114 1.16236655 -2.68124659 3.07637905 -1.36238781
## [31] 0.95299319 -0.02824184 0.06986612 -1.80215351 0.15643836 -1.64961729
## [37] 1.66989406 -1.31192572 0.32618781 -0.41668261 4.91688158 -2.63587247
## [43] 1.26380634 -1.51268787 2.72341410 -2.53571976 0.40023357 -1.45651728
## [49] 1.96189678 -1.25457296 0.33754592 0.69700952 -1.80130935 2.35056605
## [55] -1.82996718 0.68033034 -0.45960615 2.96964148 -1.55677490 0.95045611
## [61] -2.46561689 2.03871273 -0.90340584 0.33724499 -1.99301735 2.48284121
## [67] -1.18934823 0.57825620 -1.59265656 1.78651613 -0.89724060 1.32415293
## [73] -1.31437169 3.01094451 -2.22988966 0.32511054 -1.50652263 2.21259646
## [79] -1.13168073 0.92030137 0.44909383 -2.52457147 2.24285611 -1.10406307
## [85] 1.14091412 -1.78748197 2.25881474 -1.08769861 0.78368668 -2.23744690
## [91] 1.77770891 -0.78942575 0.73611350 -1.84379456 3.00067286 -0.89097046
## [97] 0.52173379 -1.37702686 1.85174086 -1.46616867 2.23662307 -1.10406307
## [103] 0.93537874 -2.10211929 0.33978208 0.26957429 -2.28142902 2.24285611
## [109] -0.77424347 1.02195098 -1.77856984 2.25742274 -2.30883686 0.36252159
## [115] -1.47636789 1.93079297 -0.93983072 0.62690665 -2.23744690 2.59194315
## [121] -0.99878531 0.97573271 -1.48484171 1.78651613 -1.35835382 2.21398846
## [127] -0.93102350 0.78368668 -2.29650639 0.14539498 -1.85270670
#The aboved two gives us the same thing. predict is a good function to know.
df_1$Gender <- as.factor(df_1$Gender)
out <- sapply(1:4, function(i){plot(df_1$Gender,gender_pca$x[,i],xlab=paste("PC",i,sep=""),ylab="Gender")})
pairs(gender_pca$x[,1:4], ylim = c(-6,4),xlim = c(-6,4),panel=function(x,y,...){text(x,y,df_1$Gender)})
# Better Ways to Visualize
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(FactoMineR)
library(ggfortify)
library(psych)
##
## Attaching package: 'psych'
##
## The following object is masked from 'package:car':
##
## logit
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(corrplot)
## corrplot 0.92 loaded
library(devtools)
## Loading required package: usethis
# Correlation
pairs.panels(df_1[,-1],
gap = 0,
bg = c("red", "blue")[df_1$Gender],
pch=21)
pairs.panels(gender_pca$x,
gap=0,
bg = c("red", "blue")[df_1$Gender],
pch=21)
fviz_eig(gender_pca, addlabels = TRUE)
fviz_pca_var(gender_pca,col.var = "cos2",
gradient.cols = c("#FFCC00", "#CC9933", "#660033", "#330033"),
repel = TRUE)
fviz_pca_ind(gender_pca, col.ind = "cos2",
gradient.cols = c("#FFCC00", "#CC9933", "#660033", "#330033"),
repel = TRUE)
biplot(gender_pca)
autoplot(gender_pca,
data = df_1[,-1],
loadings = TRUE,
labels = df_1$Gender)
# Different PCA Method.
res.pca <- PCA(df_1[,-1], graph = FALSE)
print(res.pca)
## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 131 individuals, described by 4 variables
## *The results are available in the following objects:
##
## name description
## 1 "$eig" "eigenvalues"
## 2 "$var" "results for the variables"
## 3 "$var$coord" "coord. for the variables"
## 4 "$var$cor" "correlations variables - dimensions"
## 5 "$var$cos2" "cos2 for the variables"
## 6 "$var$contrib" "contributions of the variables"
## 7 "$ind" "results for the individuals"
## 8 "$ind$coord" "coord. for the individuals"
## 9 "$ind$cos2" "cos2 for the individuals"
## 10 "$ind$contrib" "contributions of the individuals"
## 11 "$call" "summary statistics"
## 12 "$call$centre" "mean of the variables"
## 13 "$call$ecart.type" "standard error of the variables"
## 14 "$call$row.w" "weights for the individuals"
## 15 "$call$col.w" "weights for the variables"
# Visualize and Interpret PCA using these functions
#get_eigenvalue(res.pca): Extract the eigenvalues/variances of principal components
#fviz_eig(res.pca): Visualize the eigenvalues
#get_pca_ind(res.pca), get_pca_var(res.pca): Extract the results for individuals and variables, respectively.
#fviz_pca_ind(res.pca), fviz_pca_var(res.pca): Visualize the results individuals and variables, respectively.
#fviz_pca_biplot(res.pca): Make a biplot of individuals and variables.
eig.val <- get_eigenvalue(res.pca)
eig.val
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 3.07233937 76.8084842 76.80848
## Dim.2 0.68077633 17.0194082 93.82789
## Dim.3 0.22654730 5.6636825 99.49157
## Dim.4 0.02033701 0.5084251 100.00000
fviz_eig(res.pca, addlabels = TRUE, ylim = c(0, 50))
var <- get_pca_var(res.pca)
#var$coord: coordinates of variables to create a scatter plot
#var$cos2: represents the quality of representation for variables on the factor map. It’s calculated as the squared coordinates: var.cos2 = var.coord * var.coord.
#var$contrib: contains the contributions (in percentage) of the variables to the principal components.
#The contribution of a variable (var) to a given principal component is (in percentage) : (var.cos2 * 100) / (total cos2 of the component).
var
## Principal Component Analysis Results for variables
## ===================================================
## Name Description
## 1 "$coord" "Coordinates for the variables"
## 2 "$cor" "Correlations between variables and dimensions"
## 3 "$cos2" "Cos2 for the variables"
## 4 "$contrib" "contributions of the variables"
# Coordinates
head(var$coord)
## Dim.1 Dim.2 Dim.3 Dim.4
## Age 0.9081495 0.1362665 -0.39549227 0.016788749
## Height (cm) 0.9198100 -0.3453786 0.16064449 0.094109244
## Weight (kg) 0.9442068 -0.3027149 0.07513888 -0.105789161
## Income (USD) 0.7141618 0.6717775 0.19667399 -0.002691923
# Cos2: quality on the factore map
head(var$cos2)
## Dim.1 Dim.2 Dim.3 Dim.4
## Age 0.8247354 0.01856855 0.156414136 2.818621e-04
## Height (cm) 0.8460504 0.11928641 0.025806651 8.856550e-03
## Weight (kg) 0.8915265 0.09163630 0.005645852 1.119135e-02
## Income (USD) 0.5100270 0.45128507 0.038680660 7.246450e-06
# Contributions to the principal components
head(var$contrib)
## Dim.1 Dim.2 Dim.3 Dim.4
## Age 26.84389 2.727556 69.042596 1.38595680
## Height (cm) 27.53766 17.522114 11.391286 43.54893863
## Weight (kg) 29.01784 13.460558 2.492129 55.02947273
## Income (USD) 16.60061 66.289771 17.073988 0.03563184
#The plot Below is also known as variable correlation plots. It shows the relationships between all variables. It can be interpreted as follow:
#Positively correlated variables are grouped together.
#Negatively correlated variables are positioned on opposite sides of the plot origin (opposed quadrants).
#The distance between variables and the origin measures the quality of the variables on the factor map.
#Variables that are away from the origin are well represented on the factor map.
# Correlation circle
fviz_pca_var(res.pca, col.var = "black")
# Quality of representation
corrplot(var$cos2, is.corr=FALSE)
# Total cos2 of variables on Dim.1 and Dim.2
#A high cos2 indicates a good representation of the variable on the principal component.
#In this case the variable is positioned close to the circumference of the correlation circle.
#A low cos2 indicates that the variable is not perfectly represented by the PCs.
#In this case the variable is close to the center of the circle.
fviz_cos2(res.pca, choice = "var", axes = 1:2)
fviz_pca_var(res.pca, col.var = "cos2",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # Avoid text overlapping
)
# Change the transparency by cos2 values
fviz_pca_var(res.pca, alpha.var = "cos2")
corrplot(var$contrib, is.corr=FALSE)
# Contributions of variables to PC1
fviz_contrib(res.pca, choice = "var", axes = 1, top = 10)
# Contributions of variables to PC2
fviz_contrib(res.pca, choice = "var", axes = 2, top = 10)
fviz_pca_var(res.pca, col.var = "contrib",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07")
)
fviz_pca_var(res.pca, alpha.var = "contrib")
fviz_pca_ind(res.pca,
geom.ind = "point", # show points only (nbut not "text")
col.ind = df_1$Gender, # color by groups
palette = c("#00AFBB", "#E7B800", "#FC4E07"),
addEllipses = TRUE, # Concentration ellipses
legend.title = "Groups"
)
# Description of PC
res.desc <- dimdesc(res.pca, axes = c(1,2,3,4), proba = 0.05)
# Description of dimension 1
res.desc$Dim.1
##
## Link between the variable and the continuous variables (R-square)
## =================================================================================
## correlation p.value
## Weight (kg) 0.9442068 4.453567e-64
## Height (cm) 0.9198100 2.933750e-54
## Age 0.9081495 1.274128e-50
## Income (USD) 0.7141618 1.011533e-21
res.desc$Dim.2
##
## Link between the variable and the continuous variables (R-square)
## =================================================================================
## correlation p.value
## Income (USD) 0.6717775 1.594051e-18
## Weight (kg) -0.3027149 4.408287e-04
## Height (cm) -0.3453786 5.340538e-05
res.desc$Dim.3
##
## Link between the variable and the continuous variables (R-square)
## =================================================================================
## correlation p.value
## Income (USD) 0.1966740 2.435390e-02
## Age -0.3954923 2.935294e-06
res.desc$Dim.4
# Graph of Indiviuals
ind <- get_pca_ind(res.pca)
ind
## Principal Component Analysis Results for individuals
## ===================================================
## Name Description
## 1 "$coord" "Coordinates for the individuals"
## 2 "$cos2" "Cos2 for the individuals"
## 3 "$contrib" "contributions of the individuals"
## Principal Component Analysis Results for individuals
## ===================================================
## Name Description
## 1 "$coord" "Coordinates for the individuals"
## 2 "$cos2" "Cos2 for the individuals"
## 3 "$contrib" "contributions of the individuals"
#To get access to the different components, use this:
# Coordinates of individuals
head(ind$coord)
## Dim.1 Dim.2 Dim.3 Dim.4
## 1 -0.26784514 -0.32364542 0.31308413 0.18818762
## 2 0.05774267 -1.65107514 1.60335683 -0.24913390
## 3 -0.56129426 1.43875295 -1.42109221 -0.40967362
## 4 0.91764530 -0.41061870 -0.20002795 0.01996029
## 5 -1.91730031 0.02343799 -0.06433671 0.02231308
## 6 2.97794916 -1.00122996 -0.39538442 0.37574034
# Quality of individuals
head(ind$cos2)
## Dim.1 Dim.2 Dim.3 Dim.4
## 1 0.2314796641 0.3379747378 0.316276870 0.1142687280
## 2 0.0006217994 0.5083821930 0.479420973 0.0115750347
## 3 0.0689028666 0.4527188097 0.441672745 0.0367055787
## 4 0.8011423723 0.1604122197 0.038066360 0.0003790476
## 5 0.9985911149 0.0001492273 0.001124411 0.0001352466
## 6 0.8721526735 0.0985883586 0.015374361 0.0138846072
# Contributions of individuals
head(ind$contrib)
## Dim.1 Dim.2 Dim.3 Dim.4
## 1 0.017824898 0.1174527482 0.33028735 1.32930240
## 2 0.000828425 3.0567360090 8.66223957 2.32973947
## 3 0.078278181 2.3211152817 6.80478379 6.29966853
## 4 0.209222897 0.1890608895 0.13481902 0.01495462
## 5 0.913355411 0.0006159779 0.01394722 0.01868791
## 6 2.203403711 1.1240663180 0.52675531 5.29928587
fviz_pca_ind(res.pca)
fviz_pca_ind(res.pca, col.ind = "cos2",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # Avoid text overlapping (slow if many points)
)
fviz_pca_ind(res.pca, pointsize = "cos2",
pointshape = 21, fill = "#E7B800",
repel = TRUE # Avoid text overlapping (slow if many points)
)
fviz_pca_ind(res.pca, col.ind = "cos2", pointsize = "cos2",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # Avoid text overlapping (slow if many points)
)
fviz_cos2(res.pca, choice = "ind")
# Total contribution on PC1 and PC2
fviz_contrib(res.pca, choice = "ind", axes = 1:2)
# Create a random continuous variable of length 23,
# Same length as the number of active individuals in the PCA
set.seed(123)
my.cont.var <- rnorm(131)
# Color individuals by the continuous variable
fviz_pca_ind(res.pca, col.ind = my.cont.var,
gradient.cols = c("blue", "yellow", "red"),
legend.title = "Cont.Var")
fviz_pca_ind(res.pca,
geom.ind = "point", # show points only (nbut not "text")
col.ind = df_1$Gender, # color by groups
palette = c("#00AFBB", "#E7B800", "#FC4E07"),
addEllipses = TRUE, # Concentration ellipses
legend.title = "Groups"
)
fviz_pca_ind(res.pca, geom.ind = "point", col.ind = df_1$Gender,
palette = c("#00AFBB", "#E7B800", "#FC4E07"),
addEllipses = TRUE, ellipse.type = "confidence",
legend.title = "Groups"
)
fviz_pca_ind(res.pca,
label = "none", # hide individual labels
habillage = df_1$Gender, # color by groups
addEllipses = TRUE, # Concentration ellipses
palette = "jco"
)
fviz_pca_var(res.pca, geom.var = c("point", "text"))
# Show individuals text labels only
fviz_pca_ind(res.pca, geom.ind = "text")
# Change the size of arrows an labels
fviz_pca_var(res.pca, arrowsize = 1, labelsize = 5,
repel = TRUE)
# Change points size, shape and fill color
# Change labelsize
fviz_pca_ind(res.pca,
pointsize = 3, pointshape = 21, fill = "lightblue",
labelsize = 5, repel = TRUE)
fviz_pca_ind(res.pca,
geom.ind = "point", # show points only (but not "text")
group.ind = df_1$Gender, # color by groups
legend.title = "Groups",
mean.point = FALSE)
fviz_pca_ind(res.pca,
geom.ind = "point", # show points only (but not "text")
group.ind = df_1$Gender, # color by groups
legend.title = "Groups",
mean.point = TRUE)
fviz_pca_var(res.pca, axes.linetype = "blank")
ind.p <- fviz_pca_ind(res.pca, geom = "point", col.ind = df_1$Gender)
ggpubr::ggpar(ind.p,
title = "Principal Component Analysis",
subtitle = "Gender data set",
caption = "Source: factoextra",
xlab = "PC1", ylab = "PC2",
legend.title = "Gender", legend.position = "top",
ggtheme = theme_gray(), palette = "jco"
)
fviz_pca_biplot(res.pca, repel = TRUE,col.ind = df_1$Gender,
col.var = "#2E9FDF", # Variables color
)
fviz_pca_biplot(res.pca,
col.ind = df_1$Gender, palette = "jco",
addEllipses = TRUE, label = "var",
col.var = "black", repel = TRUE,
legend.title = "Gender")
fviz_pca_biplot(res.pca,
# Fill individuals by groups
geom.ind = "point",
pointshape = 21,
pointsize = 2.5,
fill.ind = df_1$Gender,
col.ind = "black",
# Color variable by groups
legend.title = list(fill = "Gender", color = "Clusters"),
repel = TRUE # Avoid label overplotting
)+
ggpubr::fill_palette("jco")+ # Indiviual fill color
ggpubr::color_palette("npg") # Variable colors
fviz_pca_biplot(res.pca,
# Individuals
geom.ind = "point",
fill.ind = df_1$Gender, col.ind = "black",
pointshape = 21, pointsize = 2,
palette = "jco",
addEllipses = TRUE,
# Variables
alpha.var ="contrib", col.var = "contrib",
gradient.cols = "RdYlBu",
legend.title = list(fill = "Gender", color = "Contrib",
alpha = "Contrib")
)
## http://www.sthda.com/english/articles/31-principal-component-methods-in-r-practical-guide/112-pca-principal-component-analysis-essentials/